rm(list=ls(all=TRUE))

####### For creating plots with same format as original - can skip and create plots anyway
if(FALSE){
require(extrafont)
require(ggokabeito)
library(myriad)

loadfonts(device = "win")
theme_figs <- function(){
  theme_myriad_semi(base_family = fonts()[1],
                    subtitle_family = fonts()[1],
                    caption_family = fonts()[1],) +
    theme(
      plot.background = element_rect(color = "white"),
      plot.title = element_text(size = rel(1.5)),
      plot.subtitle = element_text(size = rel(1.2)),
      axis.title.x = element_text(size = rel(1.25)),
      axis.title.y = element_text(size = rel(1.25)),
      axis.text.y.left = element_text(size = rel(1.2)),
      axis.text.y.right = element_text(size = rel(0.9)))
}
theme_set(theme_figs())
## Colors for manual use as needed
my_oka <- palette_okabe_ito(order = c(1, 2, 3, 5, 7),
                            alpha = NULL)
}

################################################################################
################################## CALIFORNIA ##################################
################################################################################

ca <- fread("Data/ca_votes.csv")
ca2 <- fread("Data/ca_votes_updated.csv")

ca <- rbind.data.frame(ca , ca2)

### import senators' party
ca_senators <- fread("Data/ca_senators_FH_data_W_PARTY.csv")
### fix a name in the updated ca votes data
ca$name[ca$name=="Lena Gonzalez"] <- "Gonzalez"

ca <- join(ca , ca_senators)

ca <-subset(ca , location=="Senate Floor" & name!="")
ca$name[ca$name=="Runner" & ca$session%in%c("2011-2012","2015-2016")] <- "RunnerS"
### Sharon Runner succeeded her husband George

#### pulling in dates of service - legislators suspended but who have not resigned
### are considered still part of the legislature - they are still taking a spot,
### many more resignations / suspensions than in other states
### use dates of service to determine overall roll call order
service <- read.csv("Data/ca_senators_FH_service.csv" , stringsAsFactors = FALSE)
str(service)

service$date_begin <- as.Date(service$date_begin , "%m/%d/%Y")
service$date_end <- as.Date(service$date_end , "%m/%d/%Y")
head(service)
tail(service)
str(service)
dim(ca)
ca$date <- as.Date(ca$date , "%m/%d/%y")
### weird vote in 2007 that shows up in other sessions
ca <- subset(ca , !(date=="2007-04-09" &
                      session%in%c("1999-2000","2001-2002","2003-2004",
                                   "2005-2006")))


member_dates <- data.frame(matrix(NA , nrow = 0 , ncol = 2))

for(i in 1:nrow(service)){
  temp1 <- seq(service$date_begin[i] ,service$date_end[i] , 1)
  member_dates <- rbind.data.frame(member_dates,
                                   data.frame(name = service$name[i],
                                              date = temp1))
}

clusters <- ddply(member_dates, .(date) , function(y){
  temp = sort(unique(y$name))
  members = paste0(temp , collapse = "_")
  n_members = length(temp)
  date = y$date[1]
  return(data.frame(members , n_members , date))
})

clusters <- clusters[order(clusters$date),]

clusters_unique <- data.frame(members = unique(clusters$members)) %>%
  mutate(. , cluster = c(1:nrow(.)))

### now assign vote order to each cluster
name_order <- data.frame(matrix(NA , nrow = 0 , ncol = 3))
for(i in c(1:nrow(clusters_unique))){
  temp1 <- str_split(clusters_unique$members[i] , "_" , simplify = TRUE)[1,]
  temp2 <- temp1[order(temp1)]
  rank <- seq_along(temp2)
  #  rank_p <- (rank-1)/39
  name_order <- rbind.data.frame(name_order ,
                                 data.frame(name = temp2,
                                            rank,
                                            #                    rank_p,
                                            cluster = i))
}

clusters <- join(clusters, clusters_unique , by = "members")

ca <- join(ca , subset(clusters , select = c("date" , "cluster")) ,
           by = c("date"))
ca <- join(ca , name_order,
           by = c("name","cluster"))

ca %>% with(. , mean(is.na(rank)))
ca %>% with(. , mean(is.na(cluster)))
### GREAT! everyone has a ranking. Can start analysis.

ca <- ca %>%
  group_by(session,bill,date,motion) %>%
  dplyr::mutate(rollcall = cur_group_id())

ca <- ddply(ca , .(rollcall) , mutate , n_voters = length(name))
table(ca$n_voters)
### Question - how to deal with votes where many are missing? Subset to full chamber?
ca <- subset(ca , n_voters> 3 & n_voters<=40)
### drop observatoins with errors (where I can't correctly order legislators) and the one with 3 votes

### Use only legislators who voted to determine party position
ca <- ddply(ca , .(rollcall) , mutate ,
            dem_yes =  mean(vote[party=="D" & vote%in%c("yes","no")]=="yes"),
            rep_yes =  mean(vote[party=="R" & vote%in%c("yes","no")]=="yes"))
### treating NVR as no votes here implicitly
sum(is.na(ca$dem_yes))
sum(is.na(ca$rep_yes))
### some votes no reps are voting; clearly the position is against the bill
ca$rep_yes[is.na(ca$rep_yes)] <- 0
ca$party_vote <- "yes"
ca$party_vote[ca$party=="D" & ca$dem_yes < 0.5] <- "no"
ca$party_vote[ca$party=="R" & ca$rep_yes < 0.5] <- "no"

ca$deviate <- as.numeric(ca$vote!=ca$party_vote)

### clean up names
ca <- as.data.table(ca)
ca$name <- tolower(ca$name)
ca <- ca[order(ca$rollcall , ca$name),]
ca <- ca[, `:=` (rank_p_overall = (seq_along(name)-1)/(length(name)-1),
                 dem_yes =  sum(na.omit(vote[party=="D"]=="yes")),
                 rep_yes =  sum(na.omit(vote[party=="R"]=="yes")),
                 tot_yes =  sum(na.omit(vote=="yes")),
                 dem_voters = sum(vote[party=="D"]%in%c("no","yes")),
                 rep_voters = sum(vote[party=="R"]%in%c("no","yes")),
                 tot_voters = sum(vote%in%c("no","yes"))
) , by = rollcall ]

### close votes
ca <- mutate(ca , mean_yea = tot_yes / tot_voters)
mean(ca$tot_yes==ca$ayes)
### nearly all align with official website

ca$close <- ifelse(ca$mean_yea >= 0.35 & ca$mean_yea <= 0.65 , 1 , 0)
mean(ca$close)

ca2 <- subset(ca , vote%in%c("yes","no")) %>% as.data.table()
ca2 <- ca2[order(ca2$rollcall,ca2$name),]
ca2 <- ca2[, `:=` (rank_p = (seq_along(name)-1)/(length(name)-1)
) , by = rollcall ]


### randomization inference
nsims <- 10000
output <- output_rc <- data.frame(matrix(NA , nrow = nsims , ncol = 24))

names.unique <- unique(ca$name)

set.seed(12345)
for(i in 1:nsims){
  temp <- data.frame(name = names.unique,
                     name2 = sample(names.unique , length(names.unique) , replace = FALSE))
  ca3 <- join(ca , temp , by = "name")
  for(j in 1:2){
    tempout <- data.frame(matrix(NA , nrow = 1 , ncol = 24))
    
    ### to get roll call specific order without those not voting
    if(j==1) ca3a <- ca3 %>% as.data.table()
    if(j==2) ca3a <- subset(ca3 , vote%in%c("yes","no")) %>% as.data.table()
    ca3a <- ca3a[order(ca3a$rollcall,ca3a$name2),]
    ca3a <- ca3a[, `:=` (rank_p_rc = (seq_along(name2)-1)/(length(name2)-1)) , by = rollcall ]
    # ca3a <- ddply(ca3a , .(rollcall) , mutate ,
    #              rank_p_rc = (seq_along(name2)-1)/(length(name2)-1))
    ca3a <- subset(ca3a , vote%in%c("yes","no"))
    
    ttemp <- feols(deviate ~ rank_p_rc  | name2, data = ca3a)
    ttemp1 <- summary(ttemp , cluster = "session") %>% coeftable(.)
    tempout[1,1:4] <- ttemp1
    ttemp <- feols(deviate ~ rank_p_rc  | interaction(name2 , session), data = ca3a)
    ttemp1 <- summary(ttemp , cluster = "session") %>% coeftable(.)
    tempout[1,5:8] <- ttemp1
    ### close votes
    ttemp <- feols(deviate ~ rank_p_rc  | name2, data = subset(ca3a , close==1))
    ttemp1 <- summary(ttemp , cluster = "session") %>% coeftable(.)
    tempout[1,9:12] <- ttemp1
    ttemp <- feols(deviate ~ rank_p_rc  | interaction(name2 , session), data = subset(ca3a , close==1))
    ttemp1 <- summary(ttemp , cluster = "session") %>% coeftable(.)
    tempout[1,13:16] <- ttemp1
    ### lopsided votes
    ttemp <- feols(deviate ~ rank_p_rc  | name2, data = subset(ca3a , close==0))
    ttemp1 <- summary(ttemp , cluster = "session") %>% coeftable(.)
    tempout[1,17:20] <- ttemp1
    ttemp <- feols(deviate ~ rank_p_rc  | interaction(name2 , session), data = subset(ca3a , close==0))
    ttemp1 <- summary(ttemp , cluster = "session") %>% coeftable(.)
    tempout[1,21:24] <- ttemp1
    
    if(j==1) output[i,] <- tempout
    if(j==2) output_rc[i,] <- tempout
  }
  cat("\r", i, "of", nsims) ### How far loop has progressed
}

save(ca , ca2 , output , output_rc ,
     file = "Data/Temp Files/ca_RI_sims.RData")

##################################################################################
load(file = "Data/Temp Files/ca_RI_sims.RData")

### BY roll call vote

estimate <- feols(deviate ~ rank_p  | name, data = ca2) %>%
  summary(. , cluster = "session") %>% coeftable()

### plot cdf
### Change 'standardization' - make it so change in X is 1 spot in legislature, y is % deviate
#deltax <- 100*1/39
deltax <- 1
g1 <- ggplot(output_rc, aes(X1*deltax)) +
  stat_ecdf(geom = "step")+
  labs(#title="",
    subtitle = "Leg FE",
    y = "Cumulative Probability", x="Alphabetical Rank Estimate")+
  #  theme_classic()+
  geom_hline(yintercept = mean(output_rc$X1 <=  estimate[1,1]),
             col = "black" , linetype="longdash") +
  geom_text(data = data.frame(x1 =  -.475,
                              y1 = mean(output_rc$X1 <=  estimate[1,1] + 0.0035),
                              label1 = round(mean(output_rc$X1 <=  estimate[1,1]),3)),
            aes(x = x1, y = y1, label = label1))+
  scale_y_continuous(limits = c(0,1),breaks = seq(0,1,0.1))+
  geom_vline(xintercept = estimate[1,1]*deltax,
             col = "black" , linetype="longdash") +
  scale_x_continuous(limits = c(-0.5,0.5),breaks = seq(-0.5,0.5,0.25))



estimate <- feols(deviate ~ rank_p  | interaction(name , session), data = ca2) %>%
  summary(. , cluster = "session") %>% coeftable()

g2 <- ggplot(output_rc, aes(X5*deltax)) +
  stat_ecdf(geom = "step")+
  labs(#title="",
    subtitle = "Leg by Assembly FE",
    y = "", x="Alphabetical Rank Estimate")+
  #  theme_classic()+
  geom_hline(yintercept = mean(output_rc$X5 <=  estimate[1,1]),
             col = "black" , linetype="longdash") +
  geom_text(data = data.frame(x1 =  -.475,
                              y1 = mean(output_rc$X5 <=  estimate[1,1] + 0.0075),
                              label1 = round(mean(output_rc$X5 <=  estimate[1,1]),3)),
            aes(x = x1, y = y1, label = label1))+
  scale_y_continuous(limits = c(0,1),breaks = seq(0,1,0.1))+
  geom_vline(xintercept = estimate[1,1]*deltax,
             col = "black" , linetype="longdash") +
  scale_x_continuous(limits = c(-0.5,0.5),breaks = seq(-0.5,0.5,0.25))

require(ggpubr)
pdf(
  "Generated Files/Figure 6b.pdf",
  height = 4,
  width = 10,
  onefile = FALSE#,
  #  family = 'Myriad Pro'
)
ggarrange(g1, g2,
          nrow = 1, ncol = 2)
dev.off()

### BY overall order


estimate <- feols(deviate ~ rank_p_overall  | name, data = ca2) %>%
  summary(. , cluster = "session") %>% coeftable()

g1 <- ggplot(output, aes(X1*deltax)) +
  stat_ecdf(geom = "step")+
  labs(#title="",
    subtitle = "Leg FE",
    y = "Cumulative Probability", x="Alphabetical Rank Estimate")+
  geom_hline(yintercept = mean(output$X1 <=  estimate[1,1]),
             col = "black" , linetype="longdash") +
  geom_text(data = data.frame(x1 =  -.475,
                              y1 = mean(output$X1 <=  estimate[1,1] + 0.0035),
                              label1 = round(mean(output$X1 <=  estimate[1,1]),3)),
            aes(x = x1, y = y1, label = label1))+
  scale_y_continuous(limits = c(0,1),breaks = seq(0,1,0.1))+
  geom_vline(xintercept = estimate[1,1]*deltax,
             col = "black" , linetype="longdash") +
  scale_x_continuous(limits = c(-0.5,0.5),breaks = seq(-0.5,0.5,0.25))


estimate <- feols(deviate ~ rank_p_overall  | interaction(name , session), data = ca2) %>%
  summary(. , cluster = "session") %>% coeftable()

g2 <- ggplot(output, aes(X5*deltax)) +
  stat_ecdf(geom = "step")+
  labs(#title="",
    subtitle = "Leg by Assembly FE",
    y = "", x="Alphabetical Rank Estimate")+
  #  theme_classic()+
  geom_hline(yintercept = mean(output$X5 <=  estimate[1,1]),
             col = "black" , linetype="longdash") +
  geom_text(data = data.frame(x1 =  -.475,
                              y1 = mean(output$X5 <=  estimate[1,1] + 0.03),
                              label1 = round(mean(output$X5 <=  estimate[1,1]),3)),
            aes(x = x1, y = y1, label = label1))+
  scale_y_continuous(limits = c(0,1),breaks = seq(0,1,0.1))+
  geom_vline(xintercept = estimate[1,1]*deltax,
             col = "black" , linetype="longdash") +
  scale_x_continuous(limits = c(-0.5,0.5),breaks = seq(-0.5,0.5,0.25))

require(ggpubr)
pdf(
  "Generated Files/Figure 6b overall.pdf",
  height = 5,
  width = 10,
  onefile = FALSE#,
  #  family = 'Myriad Pro'
)
ggarrange(g1, g2,
          nrow = 1, ncol = 2)
dev.off()

################################################################################
##################################  ARKANSAS  ##################################
################################################################################

ar <- fread("Data/ar_votes.csv" , fill = TRUE)

ar2 <- fread("Data/ar_votes_updated.csv" , fill = TRUE)
ar2 <- mutate(ar2 , motion = "missing",
              location = "floor",
              chamber = tolower(chamber)) %>%
  subset(. , select = c("session","chamber","location","bill","date","motion",
                        "name","vote","url"))

ar <- rbind.data.frame(ar , ar2)

ar <- ar %>%
  subset(. , chamber=="senate" & location=="floor") %>%
  group_by(session,bill,date,motion) %>%
  dplyr::mutate(rollcall = cur_group_id())
length(unique(ar$rollcall))
ddply(ar , .(session , rollcall) , nrow) %>% with(. , table(session , V1))
### 2015 session only had 34 people

### add senators' partisanship
ar_senators <- fread("Data/ar_senators_FH_data_W_PARTY.csv")
ar <- join(ar , ar_senators) %>%
  subset(. , name!="//first remove any spacer divs that might exist, should only happen on browser resizing") %>%
  mutate(. , name= name_alph) %>%
  as.data.frame(.)


ar <- ddply(ar , .(rollcall) , mutate , n_voters = length(rollcall))
table(ar$n_voters)
### drop votes that have more than 35 members
ar <- subset(ar , n_voters <= 35)

ar <- ddply(ar , .(rollcall) , mutate ,
            dem_yes =  mean(vote[party=="D" & vote%in%c("yes","no")]=="yes"),
            rep_yes =  mean(vote[party=="R" & vote%in%c("yes","no")]=="yes"))
### a few in which all dems/reps were not voting - make party position against
ar$dem_yes[is.na(ar$dem_yes)] <- 0
ar$rep_yes[is.na(ar$rep_yes)] <- 0
mean(ar$dem_yes > 0.5)
mean(ar$rep_yes > 0.5)


ar$party_vote <- "yes"
ar$party_vote[ar$party=="D" & ar$dem_yes < 0.5] <- "no"
ar$party_vote[ar$party=="R" & ar$rep_yes < 0.5] <- "no"
table(ar$party_vote , ar$vote)

ar$deviate <- as.numeric(ar$vote!=ar$party_vote)
table(ar$deviate)


ar <- as.data.table(ar) %>% mutate(. , name==tolower(name))
ar <- ar[order(ar$rollcall,ar$name),]
ar <- ar[, `:=` (rank_p_overall = (seq_along(name)-1)/(length(name)-1),
                 tot_yes =  sum(vote[vote%in%c("yes","no")]=="yes"),
                 tot_voters = sum(vote%in%c("yes","no"))
) , by = rollcall ] %>%
  mutate(. , mean_yea = tot_yes / tot_voters)

ar$close <- ifelse(ar$mean_yea >= 0.35 & ar$mean_yea <= 0.65 , 1 , 0)

ar2 <- subset(ar , vote%in%c("yes","no")) %>% as.data.table(.)
ar2 <- ar2[order(ar2$rollcall,ar2$name),]
ar2 <- ar2[, `:=` (rank_p = (seq_along(name)-1)/(length(name)-1)
) , by = rollcall ]


### randomization inference
nsims <- 10000
output <- output_rc <- data.frame(matrix(NA , nrow = nsims , ncol = 24))

names.unique <- unique(ar$name)

set.seed(12345)
for(i in 1:nsims){
  temp <- data.frame(name = names.unique,
                     name2 = sample(names.unique , length(names.unique) , replace = FALSE))
  ca3 <- join(ar , temp , by = "name")
  for(j in 1:2){
    tempout <- data.frame(matrix(NA , nrow = 1 , ncol = 24))
    
    ### to get roll call specific order without those not voting
    if(j==1) ca3a <- ca3 %>% as.data.table()
    if(j==2) ca3a <- subset(ca3 , vote%in%c("yes","no")) %>% as.data.table()
    ca3a <- ca3a[order(ca3a$rollcall,ca3a$name2),]
    ca3a <- ca3a[, `:=` (rank_p_rc = (seq_along(name2)-1)/(length(name2)-1)) , by = rollcall ]
    ca3a <- subset(ca3a , vote%in%c("yes","no"))
    
    ttemp <- feols(deviate ~ rank_p_rc  | name2, data = ca3a)
    ttemp1 <- summary(ttemp , cluster = "session") %>% coeftable(.)
    tempout[1,1:4] <- ttemp1
    ttemp <- feols(deviate ~ rank_p_rc  | interaction(name2 , session), data = ca3a)
    ttemp1 <- summary(ttemp , cluster = "session") %>% coeftable(.)
    tempout[1,5:8] <- ttemp1
    ### close votes
    ttemp <- feols(deviate ~ rank_p_rc  | name2, data = subset(ca3a , close==1))
    ttemp1 <- summary(ttemp , cluster = "session") %>% coeftable(.)
    tempout[1,9:12] <- ttemp1
    ttemp <- feols(deviate ~ rank_p_rc  | interaction(name2 , session), data = subset(ca3a , close==1))
    ttemp1 <- summary(ttemp , cluster = "session") %>% coeftable(.)
    tempout[1,13:16] <- ttemp1
    ### lopsided votes
    ttemp <- feols(deviate ~ rank_p_rc  | name2, data = subset(ca3a , close==0))
    ttemp1 <- summary(ttemp , cluster = "session") %>% coeftable(.)
    tempout[1,17:20] <- ttemp1
    ttemp <- feols(deviate ~ rank_p_rc  | interaction(name2 , session), data = subset(ca3a , close==0))
    ttemp1 <- summary(ttemp , cluster = "session") %>% coeftable(.)
    tempout[1,21:24] <- ttemp1
    
    if(j==1) output[i,] <- tempout
    if(j==2) output_rc[i,] <- tempout
  }
  cat("\r", i, "of", nsims) ### How far loop has progressed
}

save(ar , ar2 , output , output_rc ,
     file = "Data/Temp Files/ar_RI_sims.RData")

##################################################################################
load(file = "Data/Temp Files/ar_RI_sims.RData")



### by roll call

estimate <- feols(deviate ~ rank_p  | name, data = ar2) %>%
  summary(. , cluster = "session")  %>% coeftable()

deltax <- 1
g1 <- ggplot(output_rc, aes(X1*deltax)) +
  stat_ecdf(geom = "step")+
  labs(#title="",
    subtitle = "Leg FE",
    y = "Cumulative Probability", x="Alphabetical Rank Estimate")+
  #  theme_classic()+
  geom_hline(yintercept = mean(output_rc$X1 <=  estimate[1,1]),
             col = "black" , linetype="longdash") +
  geom_text(data = data.frame(x1 =  -.475,
                              y1 = mean(output_rc$X1 <=  estimate[1,1] + 0.0075),
                              label1 = round(mean(output_rc$X1 <=  estimate[1,1]),3)),
            aes(x = x1, y = y1, label = label1))+
  scale_y_continuous(limits = c(0,1),breaks = seq(0,1,0.1))+
  geom_vline(xintercept = estimate[1,1]*deltax,
             col = "black" , linetype="longdash") +
  scale_x_continuous(limits = c(-0.5,0.5),breaks = seq(-0.5,0.5,0.25))


estimate <- feols(deviate ~ rank_p  | interaction(name , session), data = ar2) %>%
  summary(. , cluster = "session") %>% coeftable()

g2 <- ggplot(output_rc, aes(X5*deltax)) +
  stat_ecdf(geom = "step")+
  labs(#title="",
    subtitle = "Leg by Assembly FE",
    y = "", x="Alphabetical Rank Estimate")+
  #  theme_classic()+
  geom_hline(yintercept = mean(output_rc$X5 <=  estimate[1,1]),
             col = "black" , linetype="longdash") +
  geom_text(data = data.frame(x1 =  -.8,
                              y1 = mean(output_rc$X5 <=  estimate[1,1] + 0.02),
                              label1 = round(mean(output_rc$X5 <=  estimate[1,1]),3)),
            aes(x = x1, y = y1, label = label1))+
  scale_y_continuous(limits = c(0,1),breaks = seq(0,1,0.1))+
  geom_vline(xintercept = estimate[1,1]*deltax,
             col = "black" , linetype="longdash") +
  scale_x_continuous(limits = c(-1,1),breaks = seq(-1,1,0.5))

require(ggpubr)
pdf(
  "Generated Files/Figure 6a.pdf",
  height = 4,
  width = 10,
  onefile = FALSE#,
  #  family = 'Myriad Pro'
)
ggarrange(g1, g2,
          nrow = 1, ncol = 2)
dev.off()

estimate <- feols(deviate ~ rank_p_overall  | name, data = ar2) %>%
  summary(. , cluster = "session") %>% coeftable()

deltax <- 1
g1 <- ggplot(output, aes(X1*deltax)) +
  stat_ecdf(geom = "step")+
  labs(#title="",
    subtitle = "Leg FE",
    y = "Cumulative Probability", x="Alphabetical Rank Estimate")+
  #  theme_classic()+
  geom_hline(yintercept = mean(output$X1 <=  estimate[1,1]),
             col = "black" , linetype="longdash") +
  geom_text(data = data.frame(x1 =  -0.475,
                              y1 = mean(output$X1 <=  estimate[1,1] + 0.0075),
                              label1 = round(mean(output$X1 <=  estimate[1,1]),3)),
            aes(x = x1, y = y1, label = label1))+
  scale_y_continuous(limits = c(0,1),breaks = seq(0,1,0.1))+
  geom_vline(xintercept = estimate[1,1]*deltax,
             col = "black" , linetype="longdash") +
  scale_x_continuous(limits = c(-0.5,0.5),breaks = seq(-0.5,0.5,0.25))


estimate <- feols(deviate ~ rank_p_overall  | interaction(name , session), data = ar2) %>%
  summary(. , cluster = "session") %>% coeftable()

g2 <- ggplot(output, aes(X5*deltax)) +
  stat_ecdf(geom = "step")+
  labs(#title="",
    subtitle = "Leg by Assembly FE",
    y = "", x="Alphabetical Rank Estimate")+
  #  theme_classic()+
  geom_hline(yintercept = mean(output$X5 <=  estimate[1,1]),
             col = "black" , linetype="longdash") +
  geom_text(data = data.frame(x1 =  -.8,
                              y1 = mean(output$X5 <=  estimate[1,1] + 0.04),
                              label1 = round(mean(output$X5 <=  estimate[1,1]),3)),
            aes(x = x1, y = y1, label = label1))+
  scale_y_continuous(limits = c(0,1),breaks = seq(0,1,0.1))+
  geom_vline(xintercept = estimate[1,1]*deltax,
             col = "black" , linetype="longdash") +
  scale_x_continuous(limits = c(-1,1),breaks = seq(-1,1,0.5))


require(ggpubr)
pdf(
  "Generated Files/Figure 6a overall.pdf",
  height = 4,
  width = 10,
  onefile = FALSE#,
  #  family = 'Myriad Pro'
)
ggarrange(g1, g2,
          nrow = 1, ncol = 2)
dev.off()


################################################################################
################################ SOUTH DAKOTA  #################################
################################################################################

ar <- fread("Data/sd_votes.csv" , fill = TRUE) %>% as.data.frame()

ar <- ar %>%
  subset(. , chamber=="senate" & location=="Senate") %>%
  group_by(session,bill,date,motion) %>%
  dplyr::mutate(rollcall = cur_group_id()) %>% as.data.frame()
length(unique(ar$rollcall))
ddply(ar , .(session , rollcall) , nrow) %>% with(. , table(session , V1))
### looks good. drop 2011s session - special
ar <- subset(ar , session!="2011s")


ar_senators <- fread("Data/sd_senators_FH_data_W_PARTY.csv")


dim(ar)
ar <- join(ar , ar_senators) %>% subset( . , !is.na(party))
dim(ar)
sum(is.na(ar$party))
### Dropping a handful of "President"s


### fix names - this is different from version sent to BIW - just affects fixed effects, not name order
ar$name[ar$name=="Greenfield (Brock)"] <- "Greenfield"
ar$name[ar$name=="Ham-Burr"] <- "Ham"
ar$name[ar$name=="Schmidt (Dennis)"] <- "Schmidt"
ar$name[ar$name=="Smidt (Orville)"] <- "Smidt"
ar$name[ar$name=="Turbak Berry"] <- "Turbak"
ar$name[ar$name=="Sutton" & ar$session%in%c("2011","2012","2013","2014","2015","2016")] <- "Sutton (Billie)"
ar$name[ar$name=="Sutton" & ar$session%in%c("2007","2008")] <- "Sutton (Dan)"


### nearly all votes have full chamber - which, in 2015, was only 34. So no
### need for complicated name order and stuff

clusters <- ddply(ar, .(session) , function(y){
  temp = sort(unique(y$name))
  members = paste0(temp , collapse = "_")
  n_members = length(temp)
  return(data.frame(members , n_members))
})

table(clusters$session , clusters$n_members)
## kurtenbach appointed to fill Diedrich's term; only case of mid-session change

ar <- join(ar , subset(clusters , select = c("session","n_members")))

ar$date <- as.Date(ar$date , "%m/%d/%Y")
ar <- ddply(ar , .(rollcall) , mutate ,
            n_voters0 = length(name))

### there are incorrect number of total votes for some issues -
### some motions / bills show up with 2-19x the total size of legislature
### problem with initial dataset; 95% of votes have 35 members, 1% 34.
ar <- subset(ar , vote!="" & n_voters0 <= 35)

ar <- as.data.table(ar) %>% mutate(. , name = tolower(name))

ar <- ar[order(ar$rollcall,ar$name),]
ar <- ar[, `:=` (rank_p_overall = (seq_along(name)-1)/(length(name)-1),
                 tot_yes =  sum(vote[vote%in%c("Yea","Nay")]=="Yea"),
                 tot_voters = sum(vote%in%c("Yea","Nay"))
) , by = rollcall ] %>%
  mutate(. , mean_yea = tot_yes / tot_voters)

ar$close <- ifelse(ar$mean_yea >= 0.35 & ar$mean_yea <= 0.65 , 1 , 0)
mean(ar$close)

### drop pure missing votes
ar$vote_missing <- 0
ar$vote_missing[ar$vote=="Excused"] <- 1
ar$party[ar$party=="Democratic"] <- "D"
ar$party[ar$party=="Republican"] <- "R"

ar <- ddply(ar , .(rollcall) , mutate ,
            dem_yes =  mean(vote[party=="D" & vote%in%c("Yea","Nay")]=="Yea"),
            rep_yes =  mean(vote[party=="R" & vote%in%c("Yea","Nay")]=="Yea"))
mean(ar$dem_yes > 0.5)
mean(ar$rep_yes > 0.5)

ar$party_vote <- "Yea"
ar$party_vote[ar$party=="D" & ar$dem_yes < 0.5] <- "Nay"
ar$party_vote[ar$party=="R" & ar$rep_yes < 0.5] <- "Nay"
table(ar$party_vote , ar$vote)

ar$deviate <- as.numeric(ar$vote!=ar$party_vote)
table(ar$deviate)

ar2 <- subset(ar , vote%in%c("Yea","Nay")) %>% as.data.table(.)
ar2 <- ar2[order(ar2$rollcall,ar2$name),]
ar2 <- ar2[, `:=` (rank_p = (seq_along(name)-1)/(length(name)-1)
) , by = rollcall ]



### randomization inference
nsims <- 10000
output <- output_rc <- data.frame(matrix(NA , nrow = nsims , ncol = 24))

names.unique <- unique(ar$name)

set.seed(12345)
for(i in 1:nsims){
  temp <- data.frame(name = names.unique,
                     name2 = sample(names.unique , length(names.unique) , replace = FALSE))
  ca3 <- join(ar , temp , by = "name")
  for(j in 1:2){
    tempout <- data.frame(matrix(NA , nrow = 1 , ncol = 24))
    
    ### to get roll call specific order without those not voting
    if(j==1) ca3a <- ca3 %>% as.data.table()
    if(j==2) ca3a <- subset(ca3 , vote%in%c("Yea","Nay")) %>% as.data.table()
    ca3a <- ca3a[order(ca3a$rollcall,ca3a$name2),]
    ca3a <- ca3a[, `:=` (rank_p_rc = (seq_along(name2)-1)/(length(name2)-1)) , by = rollcall ]
    # ca3a <- ddply(ca3a , .(rollcall) , mutate ,
    #              rank_p_rc = (seq_along(name2)-1)/(length(name2)-1))
    ca3a <- subset(ca3a , vote%in%c("Yea","Nay"))
    
    ttemp <- feols(deviate ~ rank_p_rc  | name2, data = ca3a)
    ttemp1 <- summary(ttemp , cluster = "session") %>% coeftable(.)
    tempout[1,1:4] <- ttemp1
    ttemp <- feols(deviate ~ rank_p_rc  | interaction(name2 , session), data = ca3a)
    ttemp1 <- summary(ttemp , cluster = "session") %>% coeftable(.)
    tempout[1,5:8] <- ttemp1
    ### close votes
    ttemp <- feols(deviate ~ rank_p_rc  | name2, data = subset(ca3a , close==1))
    ttemp1 <- summary(ttemp , cluster = "session") %>% coeftable(.)
    tempout[1,9:12] <- ttemp1
    ttemp <- feols(deviate ~ rank_p_rc  | interaction(name2 , session), data = subset(ca3a , close==1))
    ttemp1 <- summary(ttemp , cluster = "session") %>% coeftable(.)
    tempout[1,13:16] <- ttemp1
    ### lopsided votes
    ttemp <- feols(deviate ~ rank_p_rc  | name2, data = subset(ca3a , close==0))
    ttemp1 <- summary(ttemp , cluster = "session") %>% coeftable(.)
    tempout[1,17:20] <- ttemp1
    ttemp <- feols(deviate ~ rank_p_rc  | interaction(name2 , session), data = subset(ca3a , close==0))
    ttemp1 <- summary(ttemp , cluster = "session") %>% coeftable(.)
    tempout[1,21:24] <- ttemp1
    
    if(j==1) output[i,] <- tempout
    if(j==2) output_rc[i,] <- tempout
  }
  cat("\r", i, "of", nsims) ### How far loop has progressed
}

save(ar , ar2 , output , output_rc ,
     file = "Data/Temp Files/sd_RI_sims.RData")

##################################################################################
load(file = "Data/Temp Files/sd_RI_sims.RData")



estimate <- feols(deviate ~ rank_p  | name, data = ar2) %>%
  summary(. , cluster = "session") %>% coeftable()
#### by roll call vote
#deltax <- 100*1/34
g1 <- ggplot(output_rc, aes(X1*deltax)) +
  stat_ecdf(geom = "step")+
  labs(#title="",
    subtitle = "Leg FE",
    y = "Cumulative Probability", x="Alphabetical Rank Estimate")+
  #  theme_classic()+
  geom_hline(yintercept = mean(output_rc$X1 <=  estimate[1,1]),
             col = "black" , linetype="longdash") +
  geom_text(data = data.frame(x1 =  -0.475,
                              y1 = mean(output_rc$X1 <=  estimate[1,1] + 0.035),
                              label1 = round(mean(output_rc$X1 <=  estimate[1,1]),3)),
            aes(x = x1, y = y1, label = label1))+
  scale_y_continuous(limits = c(0,1),breaks = seq(0,1,0.1))+
  geom_vline(xintercept = estimate[1,1]*deltax,
             col = "black" , linetype="longdash") +
  scale_x_continuous(limits = c(-0.5,0.5),breaks = seq(-0.5,0.5,0.25))


estimate <- feols(deviate ~ rank_p  | interaction(name , session), data = ar2) %>%
  summary(. , cluster = "session") %>% coeftable()
g2 <- ggplot(output_rc, aes(X5*deltax)) +
  stat_ecdf(geom = "step")+
  labs(#title="",
    subtitle = "Leg by Assembly FE",
    y = "", x="Alphabetical Rank Estimate")+
  #  theme_classic()+
  geom_hline(yintercept = mean(output_rc$X5 <=  estimate[1,1]),
             col = "black" , linetype="longdash") +
  geom_text(data = data.frame(x1 =  -0.475,
                              y1 = mean(output_rc$X5 <=  estimate[1,1] + 0.075),
                              label1 = round(mean(output_rc$X5 <=  estimate[1,1]),3)),
            aes(x = x1, y = y1, label = label1))+
  scale_y_continuous(limits = c(0,1),breaks = seq(0,1,0.1))+
  geom_vline(xintercept = estimate[1,1]*deltax,
             col = "black" , linetype="longdash") +
  scale_x_continuous(limits = c(-0.5,0.5),breaks = seq(-0.5,0.5,0.25))

require(ggpubr)
pdf(
  "Generated Files/Figure 6c.pdf",
  height = 4,
  width = 10,
  onefile = FALSE#,
  #  family = 'Myriad Pro'
)
ggarrange(g1, g2,
          nrow = 1, ncol = 2)
dev.off()



estimate <- feols(deviate ~ rank_p_overall  | name, data = ar2) %>%
  summary(. , cluster = "session") %>% coeftable()

### plot cdf
deltax <- 1
g1 <- ggplot(output, aes(X1*deltax)) +
  stat_ecdf(geom = "step")+
  labs(#title="",
    subtitle = "Leg FE",
    y = "Cumulative Probability", x="Alphabetical Rank Estimate")+
  #  theme_classic()+
  geom_hline(yintercept = mean(output$X1 <=  estimate[1,1]),
             col = "black" , linetype="longdash") +
  geom_text(data = data.frame(x1 =  -0.475,
                              y1 = mean(output$X1 <=  estimate[1,1] + 0.01),
                              label1 = round(mean(output$X1 <=  estimate[1,1]),3)),
            aes(x = x1, y = y1, label = label1))+
  scale_y_continuous(limits = c(0,1),breaks = seq(0,1,0.1))+
  geom_vline(xintercept = estimate[1,1]*deltax,
             col = "black" , linetype="longdash") +
  scale_x_continuous(limits = c(-0.5,0.5),breaks = seq(-0.5,0.5,0.25))


estimate <- feols(deviate ~ rank_p_overall  | interaction(name , session), data = ar2) %>%
  summary(. , cluster = "session") %>% coeftable()
g2 <- ggplot(output, aes(X5*deltax)) +
  stat_ecdf(geom = "step")+
  labs(#title="",
    subtitle = "Leg by Assembly FE",
    y = "", x="Alphabetical Rank Estimate")+
  #  theme_classic()+
  geom_hline(yintercept = mean(output$X5 <=  estimate[1,1]),
             col = "black" , linetype="longdash") +
  geom_text(data = data.frame(x1 =  -1.8,
                              y1 = mean(output$X5 <=  estimate[1,1] + 0.07),
                              label1 = round(mean(output$X5 <=  estimate[1,1]),3)),
            aes(x = x1, y = y1, label = label1))+
  scale_y_continuous(limits = c(0,1),breaks = seq(0,1,0.1))+
  geom_vline(xintercept = estimate[1,1]*deltax,
             col = "black" , linetype="longdash") +
  scale_x_continuous(limits = c(-2,2),breaks = seq(-2,2,1))

pdf(
  "Generated Files/Figure 6c overall.pdf",
  height = 4,
  width = 10,
  onefile = FALSE#,
  #  family = 'Myriad Pro'
)
ggarrange(g1, g2,
          nrow = 1, ncol = 2)
dev.off()



################################################################################
#######################         CREATE TABLES       ############################
################################################################################

### Table 2
table2 <- matrix(NA , nrow = 6 , ncol = 4)

for(i in c("ar","ca","sd")){
  load(file = paste0("Data/Temp Files/",i,"_RI_sims.RData"))
  if(i %in% c("ar","sd")) data <- ar2
  if(i=="ca") data <- ca2

  j <- match(i , c("ar","ca","sd"))
  
  reg <- feols(deviate ~ rank_p  | name, data = data) %>%
  summary(. , cluster = "session") %>% coeftable()
  table2[2*j-1,1] <- reg[1,1]
  table2[2*j,1] <- mean(abs(output_rc[,1]) >= abs(reg[1,1]))
  
  reg <- feols(deviate ~ rank_p  | interaction(name , session), data = data) %>%
  summary(. , cluster = "session") %>% coeftable()
  table2[2*j-1,2] <- reg[1,1]
  table2[2*j,2] <- mean(abs(output_rc[,5]) >= abs(reg[1,1]))
  
  reg <- feols(deviate ~ rank_p_overall  | name, data = data) %>%
  summary(. , cluster = "session") %>% coeftable()
  table2[2*j-1,3] <- reg[1,1]
  table2[2*j,3] <- mean(abs(output[,1]) >= abs(reg[1,1]))
  
  reg <- feols(deviate ~ rank_p_overall  | interaction(name , session), data = data) %>%
  summary(. , cluster = "session") %>% coeftable()
  table2[2*j-1,4] <- reg[1,1]
  table2[2*j,4] <- mean(abs(output[,5]) >= abs(reg[1,1]))
  
}

sink("Generated Files/Replicated Table 2.txt")
table2 %>% round(3)
sink(file = NULL)

### us house results in script 5



### Table 3
table3 <- matrix(NA , nrow = 12 , ncol = 4)

for(i in c("ar","ca","sd")){
  load(file = paste0("Data/Temp Files/",i,"_RI_sims.RData"))
  if(i %in% c("ar","sd")) data <- ar2
  if(i=="ca") data <- ca2
  
  j <- match(i , c("ar","ca","sd"))
  
  reg <- feols(deviate ~ rank_p  | name, data = subset(data , close==1)) %>%
    summary(. , cluster = "session") %>% coeftable()
  table3[4*j-3,1] <- reg[1,1]
  table3[4*j-2,1] <- mean(abs(output_rc[,9]) >= abs(reg[1,1]))
  
  reg <- feols(deviate ~ rank_p  | interaction(name , session), data = subset(data , close==1)) %>%
    summary(. , cluster = "session") %>% coeftable()
  table3[4*j-3,2] <- reg[1,1]
  table3[4*j-2,2] <- mean(abs(output_rc[,13]) >= abs(reg[1,1]))
  
  reg <- feols(deviate ~ rank_p_overall  | name, data = subset(data , close==1)) %>%
    summary(. , cluster = "session") %>% coeftable()
  table3[4*j-3,3] <- reg[1,1]
  table3[4*j-2,3] <- mean(abs(output[,9]) >= abs(reg[1,1]))
  
  reg <- feols(deviate ~ rank_p_overall  | interaction(name , session), data = subset(data , close==1)) %>%
    summary(. , cluster = "session") %>% coeftable()
  table3[4*j-3,4] <- reg[1,1]
  table3[4*j-2,4] <- mean(abs(output[,13]) >= abs(reg[1,1]))
  
  reg <- feols(deviate ~ rank_p  | name, data = subset(data , close==0)) %>%
    summary(. , cluster = "session") %>% coeftable()
  table3[4*j-1,1] <- reg[1,1]
  table3[4*j,1] <- mean(abs(output_rc[,17]) >= abs(reg[1,1]))
  
  reg <- feols(deviate ~ rank_p  | interaction(name , session), data = subset(data , close==0)) %>%
    summary(. , cluster = "session") %>% coeftable()
  table3[4*j-1,2] <- reg[1,1]
  table3[4*j,2] <- mean(abs(output_rc[,21]) >= abs(reg[1,1]))
  
  reg <- feols(deviate ~ rank_p_overall  | name, data = subset(data , close==0)) %>%
    summary(. , cluster = "session") %>% coeftable()
  table3[4*j-1,3] <- reg[1,1]
  table3[4*j,3] <- mean(abs(output[,17]) >= abs(reg[1,1]))
  
  reg <- feols(deviate ~ rank_p_overall  | interaction(name , session), data = subset(data , close==0)) %>%
    summary(. , cluster = "session") %>% coeftable()
  table3[4*j-1,4] <- reg[1,1]
  table3[4*j,4] <- mean(abs(output[,21]) >= abs(reg[1,1]))
  
}

sink("Generated Files/Replicated Table 3.txt")
table3 %>% round(3)
sink(file = NULL)

### us house results in script 5